rm(list=ls(all=TRUE))
graphics.off()
library(rdd)
library(xtable)
library(dplyr)

setwd() # set working directory
load("data/data.rdata")

#drop all from closed and split lists 
data_clust <- 
  data %>%
  filter(openlist == 1 & splitlist == 0) %>%
  group_by(year, muncpr, candpart) %>% 
  summarise(cluster = n() ) %>%
  ungroup() %>%
  mutate(cluster = 1:n() ) 

data_alt <- 
  data %>% 
  filter(openlist == 1 & splitlist == 0) 

data_alt <- left_join(data_alt, data_clust, by = c("year", "muncpr", "candpart"))

data_alt <- na.omit(data.frame(data_alt[c("z1", "electedt1", "rerun" , "cluster")]))
#data_alt <- subset(data_alt, z1 <= 1)
##Display distribution and test for validity with McCrary
quartz(width = 12, height = 6)
par(mfrow=c(1,2), 
    mar=c(5,4,3,1),
    cex = 1.1)

#plot density as histogram
hist(data_alt$z1, 
     main = "Distribution of forcing variable",
     xlab = "Score relative to threshold",
     breaks = 50)
box(lwd = 1.5)
abline(v=0, lty = 2, col="grey50")

#plot density with McCrary test 
McCrary <- DCdensity(data_alt$z1)

box(lwd = 1.5)
title(main = "McCrary's density test \n for forcing variable",
      ylab = "Density",
      sub = paste("N =",length(data_alt$z1)),
      xlab = "Score relative to threshold")
abline(v=0, lty = 2, col="grey50")
legend("topright", 
       paste("p(McCrary) >", 
             floor(100*McCrary)/100),
       box.lwd = 0, 
       box.col = "white", 
       bg = "white")
box(lwd = 1.5)

#find Imbens-Kalyanaraman bandwidth
IK.bw  <- IKbandwidth(data_alt$z1, data_alt$electedt1)
IK.bw2 <- IKbandwidth(data_alt$z1, data_alt$rerun)

##Estimate and plot RD 
##rerunning and elected 
rdest    <- RDestimate(electedt1 ~ z1, data = data_alt)
rdest.cl <- RDestimate(electedt1 ~ z1, data = data_alt, cluster = data_alt$cluster)

graphics.off()
quartz(w=8, h =4)
par(mfrow=c(1,2), 
    mar=c(4,4,2,1),
    cex = 1.1)

plot(rdest, range = c(-.8,.8))
title( ylab = "Pr(Rerunning and elected)", xlab = "Votes relative to threshold")
#add rugplot to illustrate point density over running var
rugz1 <- quantile(data_alt$z1, probs = 0:100/100, na.rm = TRUE)
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

#plot for rerunning
rdest1    <- RDestimate(rerun ~ z1, data = data_alt)
rdest1.cl <- RDestimate(rerun ~ z1, data = data_alt, cluster = data_alt$cluster)
plot(rdest1, range = c(-.8,.8))
title( ylab = "Pr(Rerunning)", xlab = "Votes relative to threshold")
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

##Summarize jumps in table

out1 <- cbind(rdest$est,
              rdest$se,
              rdest.cl$se,
              rdest$bw,
              rdest$obs)
colnames(out1) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

out2 <- cbind(rdest1$est,
              rdest1$se,
              rdest1.cl$se,
              rdest1$bw,
              rdest1$obs)
colnames(out2) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

digits_tab <- rbind(matrix(3, nrow = 4, ncol = 7), rep(0,7))

xtable(cbind(t(out1),t(out2)), digits = digits_tab)

##Find CIs

CI1 <- c(rdest$est[1] + qnorm(0.025)*rdest$se[1],
         rdest$est[1] + qnorm(0.975)*rdest$se[1])
CI2 <- c(rdest1$est[1] + qnorm(0.025)*rdest1$se[1],
         rdest1$est[1] + qnorm(0.975)*rdest1$se[1])

